# -*- coding: utf-8 -*-

import urllib
import urllib2
import re
import sys
from bs4 import BeautifulSoup

reload(sys)
sys.setdefaultencoding('utf8')

url = "https://www.qiushibaike.com/hot/page/1"
user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36"
header = {'User-Agent': user_agent}
try:
    request = urllib2.Request(url, headers = header)
    response = urllib2.urlopen(request)
    content = response.read().decode('utf-8')

    soup = BeautifulSoup(content)
    lists = soup.select('.article')
    for item in lists:
        #用户名
        username = item.select(".author a img")[0]['alt']
        #头像
        avatar = item.select(".author a img")[0]['src']

        #用户ID
        user_id_pattern = re.compile(ur'[1-9]\d*')
        user_url = item.select(".author a")[0]['href']
        user_id = user_id_pattern.search(user_url).group(0)

        #性别
        sexclass = item.select(".author div")[0]['class']
        if 'manIcon' in sexclass:
            sex = '男'
        elif 'womenIcon' in sexclass:
            sex = '女'
        else:
            sex = '人妖'

        #年龄
        age = item.select(".author .articleGender")[0].string

        #内容
        text = item.select(".contentHerf .content span")[0].string

        print "user id:" + user_id + "  姓名："+username+" 头像地址："+avatar+" 性别："+sex+" 年龄："+age+"岁 内容："+text


except urllib2.URLError as e:
    if hasattr(e, 'code'):
        print e.code
    if hasattr(e, 'reason'):
        print e.reason
finally:
    pass
